home <- here::here()
# Load libraries
library(tidyverse)
library(tidylog)
library(RCurl)
library(gllvm)
library(RColorBrewer)
library(ggrepel)
library(vegan)
library(patchwork)
library(RCurl)
library(devtools)
library(ggpubr)
library(janitor)
library(brms)
library(modelr)
library(tidybayes)
library(ggstats)
library(broom)
library(broom.mixed)
# Source map-plot
source_url("https://raw.githubusercontent.com/maxlindmark/cod-interactions/main/R/functions/map-plot.R")
#source(paste0(home, "R/functions/map-plot.R"))
# Source code for rotating ordination plot (to make it work in ggplot)
source_url("https://raw.githubusercontent.com/maxlindmark/cod-interactions/main/R/functions/rotate.R")
#source(paste0(home, "R/functions/rotate.R"))Diet (dis)-similarity
Load packages
Read data
d <- read_csv(paste0(home, "/data/clean/aggregated_stomach_data.csv"))Diet similarity using bar plots and ordination
Ordination using gllvm
d2 <- d |>
dplyr::select(ends_with("_tot"))
d2$pred_id <- d$pred_id
d2 <- d2 |>
left_join(d |> dplyr::select(pred_length_cm, pred_weight_g, pred_id, species), by = "pred_id")left_join: added 3 columns (pred_length_cm, pred_weight_g, species)
> rows only in x 0
> rows only in y ( 0)
> matched rows 9,864
> =======
> rows total 9,864
# Calculate relative prey weight and average by size-class
d3 <- d2 |>
pivot_longer(ends_with("_tot")) |>
mutate(value = value/pred_weight_g) |>
filter(pred_length_cm >= 10 & pred_length_cm <= 50) |> # These are the sizes we work with
mutate(pred_length_cm = cut_width(pred_length_cm, 2),
pred_length_cm = str_remove(pred_length_cm, "[(]")) |>
separate_wider_delim(pred_length_cm, delim = ",", names = c("pred_length_cm", "scrap")) |>
mutate(pred_length_cm = ifelse(pred_length_cm == "[9", "9", pred_length_cm),
pred_length_cm = as.numeric(pred_length_cm)) |>
dplyr::select(-scrap) |>
group_by(species, pred_length_cm, name) |>
summarise(tot_prey_weight = mean(value)) |>
ungroup() |>
mutate(name = str_replace(name, "_", " "),
name = str_replace(name, "_", " "),
name = str_remove(name, " tot"),
name = str_to_title(name)) |>
filter(!(species == "Flounder" & pred_length_cm %in% c(45, 43, 39, 9)))pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9864x19, now 147960x6]
filter: removed 6,915 rows (5%), 141,045 rows remaining
summarise: now 585 rows and 4 columns, 2 group variables remaining (species, pred_length_cm)
ungroup: no grouping variables
filter: removed 60 rows (10%), 525 rows remaining
#Sample size per 2 cm class?
# d3 |> filter(species == "Flounder") |> distinct(pred_length_cm) |> arrange(pred_length_cm)
#d2 |> pivot_longer(ends_with("_tot")) |> summarise(n = n(), .by = pred_id) |> distinct(n)
# t <- d2 |>
# pivot_longer(ends_with("_tot")) |>
# mutate(value = value/pred_weight_g) |>
# filter(pred_length_cm >= 10 & pred_length_cm <= 50) |> # These are the sizes we work with
# mutate(pred_length_cm = cut_width(pred_length_cm, 2),
# pred_length_cm = str_remove(pred_length_cm, "[(]")) |>
# separate_wider_delim(pred_length_cm, delim = ",", names = c("pred_length_cm", "scrap")) |>
# mutate(pred_length_cm = ifelse(pred_length_cm == "[9", "9", pred_length_cm),
# pred_length_cm = as.numeric(pred_length_cm)) |>
# dplyr::select(-scrap) |>
# filter(!(species == "Flounder" & pred_length_cm %in% c(45, 43, 39, 9))) |>
# summarise(n = n(), .by = c(species, pred_length_cm)) |>
# mutate(n = n/15) |>
# arrange(n)
# t
# summary(t)
# ggplot(t, aes(pred_length_cm, n, color = species)) +
# geom_point()
d3 |> filter(tot_prey_weight > 0) |> arrange(tot_prey_weight)filter: removed 121 rows (23%), 404 rows remaining
# A tibble: 404 × 4
species pred_length_cm name tot_prey_weight
<chr> <dbl> <chr> <dbl>
1 Cod 35 Non Bio 0.000000199
2 Cod 47 Amphipoda 0.000000251
3 Cod 45 Polychaeta 0.000000261
4 Cod 41 Bivalvia 0.000000340
5 Flounder 33 Mysidae 0.000000366
6 Cod 43 Amphipoda 0.000000371
7 Cod 43 Polychaeta 0.000000480
8 Cod 45 Bivalvia 0.000000833
9 Flounder 25 Clupea Harengus 0.00000102
10 Cod 47 Bivalvia 0.00000137
# ℹ 394 more rows
d_wide <- d3 |> pivot_wider(names_from = name, values_from = tot_prey_weight)pivot_wider: reorganized (name, tot_prey_weight) into (Amphipoda, Bivalvia, Clupea Harengus, Clupeidae, Gadus Morhua, …) [was 525x4, now 35x17]
d_mat <- d_wide |>
dplyr::select(-species, -pred_length_cm) |>
as.matrix()
str(d_mat) num [1:35, 1:15] 0.000329 0.000684 0.000535 0.000535 0.0004 ...
- attr(*, "dimnames")=List of 2
..$ : NULL
..$ : chr [1:15] "Amphipoda" "Bivalvia" "Clupea Harengus" "Clupeidae" ...
# Fit gllvm model with two latend variables and no covariates, meaning it becomes an ordination
set.seed(999)
fit <- gllvm(y = d_mat, family = "tweedie", num.lv = 2)Note that, the tweedie family is implemented using the extended variational approximation method.
fitCall:
gllvm(y = d_mat, family = "tweedie", num.lv = 2)
family:
[1] "tweedie"
method:
[1] "VA"
log-likelihood: 2542.686
Residual degrees of freedom: 466
AIC: -4967.372
AICc: -4952.146
BIC: -4715.831
# Let's do a ggplot-residual instead
res <- residuals(fit)
p <- NCOL(fit$y)
sppind <- 1:p
ds_res <- res$residuals[, sppind]
# Compare with built-in plot from gllvm: plot(fit, which = 2)
ds_res |>
as.data.frame() |>
pivot_longer(cols = everything()) |>
ggplot(aes(sample = value)) +
geom_qq(size = 0.8) +
geom_qq_line() +
labs(y = "Sample quantiles",
x = "Theoretical quantiles")pivot_longer: reorganized (Amphipoda, Bivalvia, Clupea Harengus, Clupeidae, Gadus Morhua, …) into (name, value) [was 35x15, now 525x2]
ggsave(paste0(home, "/figures/supp/qq_ordi_gllvm.pdf"), width = 11, height = 11, units = "cm")
# Plot with GGplot
# Now make the ordination plot. We want to use ggplot, so need to hack it a bit
# See this thread: https://github.com/JenniNiku/gllvm/discussions/116
# ordiplot(fit, biplot = TRUE)
# Extract site scores
LVs = as.data.frame(getLV(fit))
# Bind LV site scores to metadata
# LVs = cbind(LVs, sim.meta)
#
# ordiplot(fittw, biplot = TRUE)
#
# ordiplot(fittw, biplot = TRUE, symbols = TRUE, spp.colors = "white")
#
LVs2 <- rotate(fit)
# text?
labs <- LVs2$species |>
as.data.frame() |>
rownames_to_column(var = "prey")
dd <- LVs2$sites |>
as.data.frame() |>
bind_cols(d_wide |> dplyr::select(species, pred_length_cm)) |>
mutate(group = "Flounder",
group = ifelse(species == "Cod" & pred_length_cm <= 25, "Small cod", group),
group = ifelse(species == "Cod" & pred_length_cm > 25, "Large cod", group))
# Alternative palettes for the groups
pal <- (brewer.pal(n = 11, name = "RdYlBu")[c(11, 4, 1)])
#pal <- (brewer.pal(n = 11, name = "RdYlGn")[c(11, 4, 1)])
#pal <- (brewer.pal(n = 8, name = "Dark2")[c(8, 7, 6)])
#pal <- brewer.pal(n = 8, name = "Dark2")[c(2, 7, 6)]
ordi <- ggplot(dd, aes(x = V1, y = V2, color = factor(group, levels = c("Flounder", "Small cod", "Large cod")),
size = pred_length_cm)) +
geom_point(alpha = 0.6) +
# stat_ellipse(aes(V1, V2, color = group),
# inherit.aes = FALSE, linewidth = 1, alpha = 0.3) +
scale_radius(range = c(0.8, 4)) +
scale_color_manual(values = pal) +
scale_fill_manual(values = pal) +
labs(size = "Predator length [cm]", shape = "Species", fill = "Group", color = "",
x = "Latent variable 1", y = "Latent variable 2") +
geom_label_repel(data = labs, aes(V1, V2, label = prey),
color = "grey20", inherit.aes = FALSE, size = 2, alpha = 0.4,
box.padding = 0.25) +
theme_sleek() +
coord_cartesian(#xlim = c(-3, 3),
ylim = c(-2.5, 2.5)) +
guides(color = guide_legend(ncol = 4),
size = guide_legend(ncol = 4)) +
theme(legend.position = "bottom",
legend.box = "vertical",
legend.margin = margin(-0.3, 0, 0, 0, unit = "cm"))
ordiMake a plot of the ontogenetic development of diets
# Calculate relative prey weight and average by size-class
d3 <- d2 |>
pivot_longer(ends_with("_tot")) |>
mutate(value = value/pred_weight_g) |>
group_by(species, pred_length_cm, name) |>
summarise(tot_prey_weight = mean(value)) |>
ungroup() |>
mutate(name = str_replace(name, "_", " "),
name = str_replace(name, "_", " "),
name = str_remove(name, " tot"),
name = str_to_title(name)) #|> pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9864x19, now 147960x6]
summarise: now 1,545 rows and 4 columns, 2 group variables remaining (species, pred_length_cm)
ungroup: no grouping variables
#filter(tot_prey_weight < quantile(tot_prey_weight, probs = 0.995))
max_size_cod <- 65
cod_important_prey <- d3 |>
filter(species == "Cod") |>
mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_cod, max_size_cod -1, pred_length_cm)) |>
mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) |>
group_by(name, predator_length_grp) |>
summarise(prey_group_tot = sum(tot_prey_weight)) |>
ungroup() |>
group_by(predator_length_grp) |>
mutate(prop = prey_group_tot / sum(prey_group_tot)) |>
ungroup() |>
mutate(max_size = as.numeric(substr(predator_length_grp, 5, 6)),
max_size = ifelse(predator_length_grp == "(0,5]", 5, max_size),
max_size = ifelse(predator_length_grp == "(5,10]", 10, max_size),
predator = "Cod")filter: removed 510 rows (33%), 1,035 rows remaining
summarise: now 195 rows and 3 columns, one group variable remaining (name)
ungroup: no grouping variables
ungroup: no grouping variables
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `max_size = as.numeric(substr(predator_length_grp, 5, 6))`.
Caused by warning:
! NAs introduced by coercion
max_size_fle <- 40
fle_important_prey <- d3 |>
filter(species == "Flounder") |>
mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_fle, max_size_fle-1, pred_length_cm)) |>
mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) |>
group_by(name, predator_length_grp) |>
summarise(prey_group_tot = sum(tot_prey_weight)) |>
ungroup() |>
group_by(predator_length_grp) |>
mutate(prop = prey_group_tot / sum(prey_group_tot)) |>
ungroup() |>
mutate(max_size = as.numeric(substr(predator_length_grp, 5, 6)),
max_size = ifelse(predator_length_grp == "(0,5]", 5, max_size),
max_size = ifelse(predator_length_grp == "(5,10]", 10, max_size),
predator = "Flounder")filter: removed 1,035 rows (67%), 510 rows remaining
summarise: now 105 rows and 3 columns, one group variable remaining (name)
ungroup: no grouping variables
ungroup: no grouping variables
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `max_size = as.numeric(substr(predator_length_grp, 5, 6))`.
Caused by warning:
! NAs introduced by coercion
area_plot <- bind_rows(fle_important_prey, cod_important_prey) |>
mutate(prop = replace_na(prop, 0))
n_cat <- nrow(area_plot |> distinct(name))distinct: removed 285 rows (95%), 15 rows remaining
colourCount <- n_cat
getPalette <- colorRampPalette(brewer.pal(12, "Paired"))
pal2 <- getPalette(colourCount)
area_plot |> distinct(name)distinct: removed 285 rows (95%), 15 rows remaining
# A tibble: 15 × 1
name
<chr>
1 Amphipoda
2 Bivalvia
3 Clupea Harengus
4 Clupeidae
5 Gadus Morhua
6 Gobiidae
7 Mysidae
8 Non Bio
9 Other
10 Other Crustacea
11 Other Pisces
12 Platichthys Flesus
13 Polychaeta
14 Saduria Entomon
15 Sprattus Sprattus
# Dataframes for geom_text with sample size
n_cod <- d2 |>
filter(species == "Cod") |>
mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_cod, max_size_cod -1, pred_length_cm)) |>
mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) |>
group_by(predator_length_grp) |>
summarise(n = length(unique(pred_id)))filter: removed 3,882 rows (39%), 5,982 rows remaining
summarise: now 13 rows and 2 columns, ungrouped
n_fle <- d2 |>
filter(species == "Flounder") |>
mutate(pred_length_cm2 = ifelse(pred_length_cm > max_size_fle, max_size_fle-1, pred_length_cm)) |>
mutate(predator_length_grp = cut(pred_length_cm2, breaks = seq(0, 100, by = 5))) |>
group_by(predator_length_grp) |>
summarise(n = length(unique(pred_id)))filter: removed 5,982 rows (61%), 3,882 rows remaining
summarise: now 7 rows and 2 columns, ungrouped
n_dat <- bind_rows(n_cod |> mutate(predator = "Cod"),
n_fle |> mutate(predator = "Flounder")) |>
mutate(max_size = as.numeric(substr(predator_length_grp, 5, 6)),
max_size = ifelse(predator_length_grp == "(0,5]", 5, max_size),
max_size = ifelse(predator_length_grp == "(5,10]", 10, max_size))Warning: There was 1 warning in `mutate()`.
ℹ In argument: `max_size = as.numeric(substr(predator_length_grp, 5, 6))`.
Caused by warning:
! NAs introduced by coercion
area_plot <- area_plot |>
mutate(name = str_to_sentence(name))
bar_diet <-
ggplot(data = area_plot, aes(x = max_size, y = prop, fill = name, color = name)) +
geom_col(width = 4.3) +
geom_text(data = n_dat, aes(x = max_size, y = 1.08, label = n), inherit.aes = FALSE,
size = 0, color = "white") +
geom_text(data = n_dat, aes(x = max_size, y = 1.04, label = n), inherit.aes = FALSE,
size = 2) +
facet_wrap(~predator, scales = "free") +
scale_fill_manual(values = pal2, name = "") +
scale_color_manual(values = pal2, name = "") +
coord_cartesian(expand = 0) +
scale_x_continuous(breaks = seq(0, 100, 5)) +
labs(y = "Proportion", x = "Max. predator size in group [cm]") +
theme(legend.key.size = unit(0.2, 'cm'),
#legend.text = element_text(face = "italic"),
legend.position = "bottom",
legend.margin = margin(-0.3, 0, 0, 0, unit = "cm"))Combine plots
bar_diet / ordi + plot_annotation(tag_levels = "a") #+ plot_layout(heights = c(1, 1.6))ggsave(paste0(home, "/figures/ordi_diet.pdf"), width = 17, height = 21, units = "cm")Schoener’s overlap index
# Calculate relative prey weight and average by size-class
qyear_rect_sum <- d2 |>
pivot_longer(ends_with("_tot")) |>
filter(pred_length_cm > 10 & pred_length_cm <= 50) |> # These are the sizes we work with
left_join(d |> dplyr::select(year, quarter, subdiv, ices_rect, X, Y, pred_id), by = "pred_id") |>
mutate(group = "Flounder",
group = ifelse(species == "Cod" & pred_length_cm <= 25, "Small cod", group),
group = ifelse(species == "Cod" & pred_length_cm > 25, "Large cod", group)) |>
group_by(year, quarter, ices_rect, group) |>
summarise(n = length(unique(pred_id)),
tot_prey = sum(value)) |>
ungroup() |>
group_by(year, quarter, ices_rect) |>
mutate(min_stom = min(n)) |>
ungroup() |>
mutate(id = paste(year, quarter, ices_rect, group, sep = "_")) |>
dplyr::select(-year, -quarter, -ices_rect, -group) |>
filter(n > 3)pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9864x19, now 147960x6]
filter: removed 7,695 rows (5%), 140,265 rows remaining
left_join: added 6 columns (year, quarter, subdiv, ices_rect, X, …)
> rows only in x 0
> rows only in y ( 513)
> matched rows 140,265
> =========
> rows total 140,265
summarise: now 384 rows and 6 columns, 3 group variables remaining (year, quarter, ices_rect)
ungroup: no grouping variables
ungroup: no grouping variables
filter: removed 51 rows (13%), 333 rows remaining
diet_prop <- d2 |>
pivot_longer(ends_with("_tot")) |>
filter(pred_length_cm > 10 & pred_length_cm < 50) |> # These are the sizes we work with
left_join(d |> dplyr::select(year, quarter, subdiv, ices_rect, X, Y, pred_id), by = "pred_id") |>
mutate(group = "Flounder",
group = ifelse(species == "Cod" & pred_length_cm <= 25, "Small cod", group),
group = ifelse(species == "Cod" & pred_length_cm > 25, "Large cod", group)) |>
group_by(year, quarter, ices_rect, group, name) |>
summarise(tot_prey_by_grp = sum(value)) |>
ungroup() |>
mutate(id = paste(year, quarter, ices_rect, group, sep = "_")) |>
filter(id %in% unique(qyear_rect_sum$id)) |>
left_join(qyear_rect_sum, by = "id") |>
mutate(prop_prey = tot_prey_by_grp / tot_prey)pivot_longer: reorganized (amphipoda_tot, bivalvia_tot, clupeidae_tot, clupea_harengus_tot, gadus_morhua_tot, …) into (name, value) [was 9864x19, now 147960x6]
filter: removed 8,295 rows (6%), 139,665 rows remaining
left_join: added 6 columns (year, quarter, subdiv, ices_rect, X, …)
> rows only in x 0
> rows only in y ( 553)
> matched rows 139,665
> =========
> rows total 139,665
summarise: now 5,760 rows and 6 columns, 4 group variables remaining (year, quarter, ices_rect, group)
ungroup: no grouping variables
filter: removed 765 rows (13%), 4,995 rows remaining
left_join: added 3 columns (n, tot_prey, min_stom)
> rows only in x 0
> rows only in y ( 0)
> matched rows 4,995
> =======
> rows total 4,995
# Calculate overlap by year and ices rect and species group. Make the proportions wide!
# unique(diet_prop$group)
diet_prop_wide <- diet_prop |>
dplyr::select(-tot_prey_by_grp, -tot_prey, -id, -n) |>
pivot_wider(names_from = group, values_from = prop_prey) |>
mutate(abs_f_sc = abs(Flounder - `Small cod`),
abs_f_lc = abs(Flounder - `Large cod`),
abs_lc_sc = abs(`Large cod` - `Small cod`)) |>
group_by(year, quarter, ices_rect) |>
summarise(`Flounder\nSmall cod` = 1 - 0.5*sum(abs_f_sc),
`Flounder\nLarge cod` = 1 - 0.5*sum(abs_f_lc),
`Small cod\nLarge cod` = 1 - 0.5*sum(abs_lc_sc))pivot_wider: reorganized (group, prop_prey) into (Flounder, Large cod, Small cod) [was 4995x7, now 2145x8]
summarise: now 143 rows and 6 columns, 2 group variables remaining (year, quarter)
diet_prop_wide$latitude <- mapplots::ices.rect(diet_prop_wide$ices_rect)$lat
diet_prop_wide$longitude <- mapplots::ices.rect(diet_prop_wide$ices_rect)$lon
diet_prop_wide <- sdmTMB::add_utm_columns(diet_prop_wide)Warning: Multiple UTM zones detected.
Proceeding with the most common value.
You may wish to choose a different projection.
Proceeding with UTM zone 33N; CRS = 32633.
Visit https://epsg.io/32633 to verify.
ovr <- diet_prop_wide |>
pivot_longer(c(`Flounder\nSmall cod`, `Flounder\nLarge cod`, `Small cod\nLarge cod`),
names_to = "overlap_group", values_to = "overlap") |>
drop_na(overlap)pivot_longer: reorganized (Flounder
Small cod, Flounder
Large cod, Small cod
Large cod) into (overlap_group, overlap) [was 143x10, now 429x9]
drop_na (grouped): removed 166 rows (39%), 263 rows remaining
summary(ovr$overlap) Min. 1st Qu. Median Mean 3rd Qu. Max.
0.00000 0.02823 0.10553 0.18426 0.25716 0.97485
# Plot diet overlap
# plot_map +
# geom_sf(color = "gray80") +
# geom_point(data = ovr, aes(X*1000, Y*1000, color = overlap),
# size = 10) +
# viridis::scale_color_viridis() +
# geom_sf() +
# facet_wrap(~overlap_group, ncol = 2) +
# NULL
set.seed(99)
ps <- ggplot(ovr, aes(overlap_group, overlap, color = ices_rect)) +
geom_jitter(height = 0, width = 0.1, alpha = 0.3) +
geom_boxplot(aes(overlap_group, overlap),
inherit.aes = FALSE, fill = NA, width = 0.2, alpha = 0.2, size = 0.6) +
viridis::scale_color_viridis(discrete = TRUE, name = "ICES\nrectangle") +
geom_hline(yintercept = 0.6, linetype = 2, alpha = 0.6) +
labs(y = "Schoener's overlap index",
x = "") +
theme(legend.position = "bottom",
legend.key.size = unit(0.01, "cm"),
legend.text = element_text(size = 7),
legend.title = element_text(size = 8),
legend.margin = margin(-0.3, 0, 0, 0, unit = "cm"))
ps# summary(ovr$overlap)
#
ovr |> filter(overlap == 0)filter (grouped): removed 259 rows (98%), 4 rows remaining
# A tibble: 4 × 9
# Groups: year, quarter [4]
year quarter ices_rect latitude longitude X Y overlap_group overlap
<dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
1 2018 1 42G6 56.8 16.5 592. 6291. "Flounder\nLar… 0
2 2021 1 42G6 56.8 16.5 592. 6291. "Small cod\nLa… 0
3 2021 4 43G6 57.2 16.5 591. 6346. "Flounder\nLar… 0
4 2022 4 42G6 56.8 16.5 592. 6291. "Flounder\nLar… 0
How does the diet overlap relate to biomass density of the species?
Trawl data
## Read in trawl data. There's one file for cod and one for flounder
# They go up to 2020 Q1
# trawl_surveys_cod <- read.csv("data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) COD.csv", sep = ";")
# trawl_surveys_fle <- read.csv("data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) FLE.csv", sep = ";")
trawl_surveys_cod <- read_delim(paste0(home, "/data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) COD.csv"), delim = ";")Rows: 9199 Columns: 35
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (10): Species, Index, Appr. status, Validity, Station name, ICES, Dist....
dbl (13): Year, Quarter, Month, Seqno, Haul, Subsam, Processing, Size, Subd...
num (7): Lat, Long, Bottom depth, Doorspread, Opening, Tot. no./hour, No./...
lgl (4): Sex, Headropedepth, DNA sampleID, NumberToSample
date (1): Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trawl_surveys_fle <- read_delim(paste0(home, "/data/stomach-data/BITS/trawl/Trawl Surveys Zincl (L) FLE.csv"), delim = ";")Rows: 7306 Columns: 35
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (10): Species, Index, Appr. status, Validity, Station name, ICES, Dist....
dbl (13): Year, Quarter, Month, Seqno, Haul, Subsam, Processing, Size, Subd...
num (7): Lat, Long, Bottom depth, Doorspread, Opening, Tot. no./hour, No./...
lgl (4): Sex, Headropedepth, DNA sampleID, NumberToSample
date (1): Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Combine for the two species, filter and clean!
trawl_data <- rbind(trawl_surveys_cod, trawl_surveys_fle) |>
clean_names() |>
mutate(swept_area = as.numeric(gsub(",", "\\.", swept_area)),
kg_hour = as.numeric(gsub(",", "\\.", kg_hour)),
dist = as.numeric(gsub(",", "\\.", dist))) |>
as.data.frame()
str(trawl_data)'data.frame': 16505 obs. of 35 variables:
$ species : chr "cod" "cod" "cod" "cod" ...
$ date : Date, format: "2015-02-26" "2015-02-26" ...
$ year : num 2015 2015 2015 2015 2015 ...
$ quarter : num 1 1 1 1 1 1 1 1 1 1 ...
$ month : num 2 2 2 2 2 2 2 2 2 2 ...
$ index : chr "OXBH.2015.2" "OXBH.2015.2" "OXBH.2015.2" "OXBH.2015.2" ...
$ appr_status : chr "Locked" "Locked" "Locked" "Locked" ...
$ seqno : num 10048 10048 10048 10048 10048 ...
$ haul : num 2 2 2 2 2 2 2 2 2 2 ...
$ validity : chr "V" "V" "V" "V" ...
$ station_name : chr "SLAGGENABBEN" "SLAGGENABBEN" "SLAGGENABBEN" "SLAGGENABBEN" ...
$ subsam : num 0 0 0 0 0 0 0 0 0 0 ...
$ processing : num 0 0 0 0 0 0 0 0 0 0 ...
$ size : num 9 9 9 9 9 9 9 9 9 9 ...
$ sex : logi NA NA NA NA NA NA ...
$ subdiv : num 25 25 25 25 25 25 25 25 25 25 ...
$ rect : num 3959 3959 3959 3959 3959 ...
$ ices : chr "39G4" "39G4" "39G4" "39G4" ...
$ lat : num 55289 55289 55289 55289 55289 ...
$ long : num 143628 143628 143628 143628 143628 ...
$ bottom_depth : num 603 603 603 603 603 603 603 603 603 603 ...
$ duration : num 30 30 30 30 30 30 30 30 30 30 ...
$ dist : num 1.49 1.49 1.49 1.49 1.49 1.49 1.49 1.49 1.49 1.49 ...
$ doorspread : num 875 875 875 875 875 875 875 875 875 875 ...
$ opening : num 56 56 56 56 56 56 56 56 56 56 ...
$ headropedepth : logi NA NA NA NA NA NA ...
$ swept_area : num 0.483 0.483 0.483 0.483 0.483 0.483 0.483 0.483 0.483 0.483 ...
$ kg_hour : num 1606 1606 1606 1606 1606 ...
$ tot_no_hour : num 57467 57467 57467 57467 57467 ...
$ lengthcl : num 200 210 220 230 240 250 260 270 280 290 ...
$ no_hour : num 552 368 736 184 1472 ...
$ length_measure_type: chr "Total length TL" "Total length TL" "Total length TL" "Total length TL" ...
$ dna_sample_id : logi NA NA NA NA NA NA ...
$ number_to_sample : logi NA NA NA NA NA NA ...
$ smhi_serial_no : num 1 1 1 1 1 1 1 1 1 1 ...
sort(unique(trawl_data$lat)) |> head(20) [1] 5541 5542 5608 5613 5619 5629 5632 5634 5703 5706 5712 5717 5721 5722 5723
[16] 5728 5729 5743 5746 5750
# For these coordinates, we can use the function Fede provided:
format.position <- function(x){
sign.x <- sign(x)
x <- abs(x)
x <- ifelse(nchar(x)==3, paste("0",x,sep=""), x)
x <- ifelse(nchar(x)==2, paste("00",x,sep=""), x)
x <- ifelse(nchar(x)==1, paste("000",x,sep=""), x)
dec.x <- as.numeric(paste(substring(x,1,2)))+as.numeric(paste(substring(x,3,4)))/60
dec.x <- sign.x*dec.x
}
# Apply function
trawl_data$lat <- format.position(trawl_data$lat)
trawl_data$lon <- format.position(trawl_data$long)
trawl_data <- sdmTMB::add_utm_columns(trawl_data, ll_names = c("lon", "lat"))Warning: Multiple UTM zones detected.
Proceeding with the most common value.
You may wish to choose a different projection.
Proceeding with UTM zone 33N; CRS = 32633.
Visit https://epsg.io/32633 to verify.
# SMHI serial no?
t <- trawl_data |> drop_na(smhi_serial_no)drop_na: removed 2,390 rows (14%), 14,115 rows remaining
plot_map +
geom_point(data = trawl_data, aes(X*1000, Y*1000, color = factor(year)),
alpha = 0.5, size = 0.3) +
theme_sleek(base_size = 8)Warning: `guide_colourbar()` needs continuous scales.
Warning: Removed 75 rows containing missing values or values outside the scale range
(`geom_point()`).
Read and join the biologial data
trawl_surveys_s_cod <- read_delim(paste0(home, "/data/stomach-data/BITS/biological/Trawl Surveys (S) COD.csv"), delim = ";")Rows: 10381 Columns: 33
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (9): Appr. status, Sampling type, Validity, Station name, ICES, Specie...
dbl (22): Year, Quarter, Month, Trip No, Seqno, Haul, Subsam, Processing, S...
lgl (1): Maturity No (SMSF)
date (1): Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trawl_surveys_s_flounder <- read_delim(paste0(home, "/data/stomach-data/BITS/biological/Trawl Surveys (S) FLE.csv"), delim = ";")Rows: 13946 Columns: 33
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
chr (9): Appr. status, Sampling type, Validity, Station name, ICES, Specie...
dbl (20): Year, Quarter, Month, Trip No, Seqno, Haul, Subsam, Processing, S...
lgl (3): Age quality, Age grading, Maturity No (SMSF)
date (1): Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trawl_surveys_s_cod$Species <- "Cod"
trawl_surveys_s_flounder$Species <- "Flounder"
bio_data <- bind_rows(trawl_surveys_s_cod, trawl_surveys_s_flounder) |>
clean_names()
# Join the trawl, bio and stomach data. First create a unique ID.
# In earlier versions I had a column called otolith number (fish ID!), which was really fish id, but it isn't here anymore.
# Add a new species code in the trawl data that matches the stomach data
trawl_data <- trawl_data |> mutate(predator_code = ifelse(species == "cod", "COD", "FLE"))
bio_data <- bio_data |> mutate(predator_code = ifelse(species == "Cod", "COD", "FLE"))
unique(is.na(trawl_data[, c("year", "quarter", "haul")])) year quarter haul
[1,] FALSE FALSE FALSE
# Go ahead
trawl_data$haul_id <- paste(trawl_data$year,
trawl_data$quarter,
trawl_data$haul,
sep = "_")
# Should be a unique ID per length and predator code
trawl_data |>
group_by(haul_id, lengthcl, predator_code) |>
mutate(n = n()) |>
ungroup() |>
distinct(n)ungroup: no grouping variables
distinct: removed 16,504 rows (>99%), one row remaining
# A tibble: 1 × 1
n
<int>
1 1
# Now we want the cpue of a species-size combination as a column, then make it distinct per haul
trawl_data_unique_haul <- trawl_data |>
dplyr::select(-species, -lengthcl, -predator_code, -kg_hour, -tot_no_hour, -no_hour, -length_measure_type, -sex, -long) |> # Remove any column that has anything to do with catch, because that info should come from the species dataframes below. I.e., after left_joining this and the catch data, any 0 zero catches should be NA in the joined data
distinct(haul_id, .keep_all = TRUE)distinct: removed 15,851 rows (96%), 654 rows remaining
trawl_data_cod <- trawl_data |>
filter(!validity == "I") |>
filter(predator_code == "COD") |>
mutate(kg = kg_hour * (duration / 60),
kg_km2 = kg / swept_area) |> # make it biomass density
drop_na(kg_km2) |>
mutate(size_group = ifelse(lengthcl >= 100 & lengthcl < 250, "small", NA),
size_group = ifelse(lengthcl >= 250 & lengthcl < 500, "medium", size_group)) |>
group_by(haul_id, size_group) |>
summarise(kg_km2 = sum(kg_km2)) |>
filter(size_group %in% c("small", "medium")) |>
pivot_wider(names_from = size_group, values_from = kg_km2) |>
rename(mcod_kg_km2 = medium,
scod_kg_km2 = small) |>
mutate(mcod_kg_km2 = replace_na(mcod_kg_km2, 0),
scod_kg_km2 = replace_na(scod_kg_km2, 0))filter: removed 32 rows (<1%), 16,473 rows remaining
filter: removed 7,290 rows (44%), 9,183 rows remaining
drop_na: removed 169 rows (2%), 9,014 rows remaining
summarise: now 1,130 rows and 3 columns, one group variable remaining (haul_id)
filter (grouped): removed 253 rows (22%), 877 rows remaining
pivot_wider: reorganized (size_group, kg_km2) into (medium, small) [was 877x3, now 468x3]
rename: renamed 2 variables (mcod_kg_km2, scod_kg_km2)
# Ok, so because this is catches (no hauls with no fish... the only reason I have 0 catches is if one of the size-groups doesn't have a catch!)
# Check with a haul_id
trawl_data |>
filter(predator_code == "COD" & haul_id == "2015_1_20") filter: removed 16,500 rows (>99%), 5 rows remaining
species date year quarter month index appr_status seqno haul
1 cod 2015-02-28 2015 1 2 OXBH.2015.20 Locked 10057 20
2 cod 2015-02-28 2015 1 2 OXBH.2015.20 Locked 10057 20
3 cod 2015-02-28 2015 1 2 OXBH.2015.20 Locked 10057 20
4 cod 2015-02-28 2015 1 2 OXBH.2015.20 Locked 10057 20
5 cod 2015-02-28 2015 1 2 OXBH.2015.20 Locked 10057 20
validity station_name subsam processing size sex subdiv rect ices
1 V 11 S HOBURG BANK 0 0 9 NA 26 4163 41G8
2 V 11 S HOBURG BANK 0 0 9 NA 26 4163 41G8
3 V 11 S HOBURG BANK 0 0 9 NA 26 4163 41G8
4 V 11 S HOBURG BANK 0 0 9 NA 26 4163 41G8
5 V 11 S HOBURG BANK 0 0 9 NA 26 4163 41G8
lat long bottom_depth duration dist doorspread opening headropedepth
1 56.38333 182938 377 30 1.5 849 6 NA
2 56.38333 182938 377 30 1.5 849 6 NA
3 56.38333 182938 377 30 1.5 849 6 NA
4 56.38333 182938 377 30 1.5 849 6 NA
5 56.38333 182938 377 30 1.5 849 6 NA
swept_area kg_hour tot_no_hour lengthcl no_hour length_measure_type
1 0.472 4.508 10 260 2 Total length TL
2 0.472 4.508 10 330 2 Total length TL
3 0.472 4.508 10 350 2 Total length TL
4 0.472 4.508 10 400 2 Total length TL
5 0.472 4.508 10 410 2 Total length TL
dna_sample_id number_to_sample smhi_serial_no lon X Y
1 NA NA 19 18.48333 715.0414 6254.191
2 NA NA 19 18.48333 715.0414 6254.191
3 NA NA 19 18.48333 715.0414 6254.191
4 NA NA 19 18.48333 715.0414 6254.191
5 NA NA 19 18.48333 715.0414 6254.191
predator_code haul_id
1 COD 2015_1_20
2 COD 2015_1_20
3 COD 2015_1_20
4 COD 2015_1_20
5 COD 2015_1_20
trawl_data_cod# A tibble: 468 × 3
# Groups: haul_id [468]
haul_id mcod_kg_km2 scod_kg_km2
<chr> <dbl> <dbl>
1 2015_1_10 19145. 6892.
2 2015_1_12 7867. 2503.
3 2015_1_14 12351. 6444.
4 2015_1_15 3899. 2481.
5 2015_1_17 3583. 1792.
6 2015_1_2 38232. 8311.
7 2015_1_20 23.9 0
8 2015_1_21 0.401 0
9 2015_1_22 203. 18.4
10 2015_1_25 30.3 10.1
# ℹ 458 more rows
# Now do the same for flounder and join with the cod data, then with haul data!
# Different code because we do not need to worry about size
trawl_data_fle <- trawl_data |>
filter(!validity == "I") |>
filter(predator_code == "FLE") |>
mutate(kg = kg_hour * (duration / 60),
kg_km2 = kg / swept_area) |> # make it biomass density
#drop_na(kg_km2) |>
mutate(size_group = ifelse(lengthcl >= 100, "benthic", "all")) |>
group_by(haul_id, size_group) |>
summarise(fle_kg_km2 = sum(kg_km2)) |>
filter(size_group == "benthic")filter: removed 32 rows (<1%), 16,473 rows remaining
filter: removed 9,183 rows (56%), 7,290 rows remaining
summarise: now 642 rows and 3 columns, one group variable remaining (haul_id)
filter (grouped): removed 152 rows (24%), 490 rows remaining
# Add back summaries to dataframe of unique hauls
trawl_data_unique_haul <- trawl_data_unique_haul |> filter(!validity == "I")filter: removed 16 rows (2%), 638 rows remaining
trawl_data_wide <- left_join(trawl_data_unique_haul, trawl_data_cod, by = "haul_id")left_join: added 2 columns (mcod_kg_km2, scod_kg_km2)
> rows only in x 170
> rows only in y ( 0)
> matched rows 468
> =====
> rows total 638
trawl_data_wide <- left_join(trawl_data_wide, trawl_data_fle, by = "haul_id")left_join: added 2 columns (size_group, fle_kg_km2)
> rows only in x 148
> rows only in y ( 0)
> matched rows 490
> =====
> rows total 638
# Change the NA's to 0's...
trawl_data_wide <- trawl_data_wide |>
mutate(scod_kg_km2 = replace_na(scod_kg_km2, 0),
mcod_kg_km2 = replace_na(mcod_kg_km2, 0),
fle_kg_km2 = replace_na(fle_kg_km2, 0))
# Now add in the same ID in the bio_data
unique(is.na(bio_data[, c("year", "quarter", "haul")])) year quarter haul
[1,] FALSE FALSE FALSE
# Go ahead
bio_data$haul_id <- paste(bio_data$year,
bio_data$quarter,
bio_data$haul,
sep = "_")
unique(bio_data$haul_id) |> head(20) [1] "2015_1_94" "2022_4_249" "2022_4_278" "2021_1_85" "2022_1_53"
[6] "2018_4_9" "2016_4_43" "2017_1_73" "2017_4_2" "2017_4_4"
[11] "2017_4_6" "2019_4_90" "2022_1_82" "2016_1_55" "2022_1_99"
[16] "2022_4_246" "2022_4_254" "2022_4_279" "2021_1_80" "2022_1_57"
# Now join in trawl data into the bio data (and then into stomach data)
colnames(bio_data) [1] "date" "year" "quarter"
[4] "month" "trip_no" "appr_status"
[7] "sampling_type" "seqno" "haul"
[10] "validity" "station_name" "subsam"
[13] "processing" "size" "max_maturity"
[16] "subdiv" "rect" "ices"
[19] "species" "lengthcl" "fishno"
[22] "age" "age_quality" "age_grading"
[25] "weight" "sex" "maturity"
[28] "maturity_no_smsf" "stomach_sample_code" "specimen_note"
[31] "individual_no" "spec_tsn" "day_night"
[34] "predator_code" "haul_id"
colnames(trawl_data_wide) [1] "date" "year" "quarter" "month"
[5] "index" "appr_status" "seqno" "haul"
[9] "validity" "station_name" "subsam" "processing"
[13] "size" "subdiv" "rect" "ices"
[17] "lat" "bottom_depth" "duration" "dist"
[21] "doorspread" "opening" "headropedepth" "swept_area"
[25] "dna_sample_id" "number_to_sample" "smhi_serial_no" "lon"
[29] "X" "Y" "haul_id" "mcod_kg_km2"
[33] "scod_kg_km2" "size_group" "fle_kg_km2"
# Check first for overlapping columns, and if so, if one of the datasets has any NAs
common_cols <- intersect(colnames(bio_data), colnames(trawl_data_wide))
unique(is.na(trawl_data_wide[, common_cols])) date year quarter month appr_status seqno haul validity station_name
[1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[2,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
subsam processing size subdiv rect ices haul_id
[1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[2,] TRUE TRUE TRUE FALSE FALSE FALSE FALSE
unique(is.na(bio_data[, common_cols])) date year quarter month appr_status seqno haul validity station_name
[1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
subsam processing size subdiv rect ices haul_id
[1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# Trawl data has some NA's in the common columns. Select only the important columns
colnames(trawl_data_wide)[!colnames(trawl_data_wide) %in% colnames(bio_data)] [1] "index" "lat" "bottom_depth" "duration"
[5] "dist" "doorspread" "opening" "headropedepth"
[9] "swept_area" "dna_sample_id" "number_to_sample" "smhi_serial_no"
[13] "lon" "X" "Y" "mcod_kg_km2"
[17] "scod_kg_km2" "size_group" "fle_kg_km2"
trawl_data_wide <- trawl_data_wide |>
dplyr::select(haul_id, fle_kg_km2, mcod_kg_km2, scod_kg_km2, lat, lon, bottom_depth, X, Y, smhi_serial_no)
bio_data <- left_join(bio_data, trawl_data_wide, by = "haul_id")left_join: added 9 columns (fle_kg_km2, mcod_kg_km2, scod_kg_km2, lat, lon, …)
> rows only in x 0
> rows only in y ( 146)
> matched rows 24,327
> ========
> rows total 24,327
names(bio_data) [1] "date" "year" "quarter"
[4] "month" "trip_no" "appr_status"
[7] "sampling_type" "seqno" "haul"
[10] "validity" "station_name" "subsam"
[13] "processing" "size" "max_maturity"
[16] "subdiv" "rect" "ices"
[19] "species" "lengthcl" "fishno"
[22] "age" "age_quality" "age_grading"
[25] "weight" "sex" "maturity"
[28] "maturity_no_smsf" "stomach_sample_code" "specimen_note"
[31] "individual_no" "spec_tsn" "day_night"
[34] "predator_code" "haul_id" "fle_kg_km2"
[37] "mcod_kg_km2" "scod_kg_km2" "lat"
[40] "lon" "bottom_depth" "X"
[43] "Y" "smhi_serial_no"
Summarise haul data on the ICES rectangle level to join into the diet
# Now calculate spatial overlap in biomass density...
qyear_rect_sum_biom <- bio_data |>
dplyr::select(fle_kg_km2, mcod_kg_km2, scod_kg_km2, ices, year, quarter) |>
rename(ices_rect = ices) |> #,
group_by(year, quarter, ices_rect) |>
summarise(mean_fle = mean(fle_kg_km2),
mean_scod = mean(scod_kg_km2),
mean_mcod = mean(mcod_kg_km2)) rename: renamed one variable (ices_rect)
summarise: now 188 rows and 6 columns, 2 group variables remaining (year, quarter)
# Join into diet data
ovr <- ovr |>
pivot_wider(names_from = overlap_group, values_from = overlap) |>
drop_na() |>
left_join(qyear_rect_sum_biom)pivot_wider: reorganized (overlap_group, overlap) into (Flounder
Small cod, Flounder
Large cod, Small cod
Large cod) [was 263x9, now 113x10]
drop_na (grouped): removed 38 rows (34%), 75 rows remaining
Joining with `by = join_by(year, quarter, ices_rect)`
left_join: added 3 columns (mean_fle, mean_scod, mean_mcod)
> rows only in x 0
> rows only in y (113)
> matched rows 75
> =====
> rows total 75
Fit zero inflated beta regressions using brms
# https://www.andrewheiss.com/blog/2021/11/08/beta-regression-guide/#zero-inflated-beta-regression-bayesian-style
# Full model
ovr <- ovr |>
ungroup() |>
clean_names() |>
pivot_longer(c("flounder_small_cod", "flounder_large_cod", "small_cod_large_cod")) |>
mutate(value = ifelse(value < 1e-15, 0, value),
mean_biom_sc = as.numeric(scale(log(mean_fle + mean_scod + mean_mcod) / 3)),
) #|> ungroup: no grouping variables
pivot_longer: reorganized (flounder_small_cod, flounder_large_cod, small_cod_large_cod) into (name, value) [was 75x13, now 225x12]
# Scale densities before averaging?
# mutate(mean_scod2 = log(mean_scod)/max(log(mean_scod)),
# mean_fle2 = log(mean_fle)/max(log(mean_fle)),
# mean_mcod2 = log(mean_mcod)/max(log(mean_mcod)),
# mean_biom_sc2 = as.numeric(scale(mean_scod2 + mean_fle2 + mean_mcod2) / 3))
# ggplot(ovr, aes(mean_biom_sc, mean_biom_sc2)) +
# geom_point()
# cor(ovr$mean_biom_sc, ovr$mean_biom_sc2)
write_csv(ovr, paste0(paste0(home, "/data/clean/s_overlap.csv")))
zib_model <- bf(
value ~ name*mean_biom_sc,
phi ~ name,
zi ~ name,
family = zero_inflated_beta()
)
fit <- brm(
formula = zib_model,
data = ovr,
cores = 4,
chains = 4,
iter = 4000
)Compiling Stan program...
Start sampling
fit Family: zero_inflated_beta
Links: mu = logit; phi = log; zi = logit
Formula: value ~ name * mean_biom_sc
phi ~ name
zi ~ name
Data: ovr (Number of observations: 225)
Draws: 4 chains, each with iter = 4000; warmup = 2000; thin = 1;
total post-warmup draws = 8000
Population-Level Effects:
Estimate Est.Error l-95% CI u-95% CI Rhat
Intercept -2.50 0.14 -2.77 -2.21 1.00
phi_Intercept 2.15 0.19 1.77 2.50 1.00
zi_Intercept -3.66 0.74 -5.34 -2.43 1.00
nameflounder_small_cod 0.79 0.19 0.41 1.15 1.00
namesmall_cod_large_cod 1.71 0.20 1.32 2.09 1.00
mean_biom_sc 0.11 0.12 -0.12 0.35 1.00
nameflounder_small_cod:mean_biom_sc 0.05 0.16 -0.26 0.36 1.00
namesmall_cod_large_cod:mean_biom_sc -0.48 0.18 -0.83 -0.12 1.00
phi_nameflounder_small_cod -0.39 0.25 -0.88 0.11 1.00
phi_namesmall_cod_large_cod -1.28 0.24 -1.75 -0.80 1.00
zi_nameflounder_small_cod -0.78 1.28 -3.53 1.54 1.00
zi_namesmall_cod_large_cod -3.91 3.40 -12.69 0.49 1.00
Bulk_ESS Tail_ESS
Intercept 4687 4829
phi_Intercept 4928 5265
zi_Intercept 9697 6493
nameflounder_small_cod 5423 5291
namesmall_cod_large_cod 5841 5757
mean_biom_sc 5098 5544
nameflounder_small_cod:mean_biom_sc 5367 5910
namesmall_cod_large_cod:mean_biom_sc 6004 6249
phi_nameflounder_small_cod 5675 5797
phi_namesmall_cod_large_cod 5555 6433
zi_nameflounder_small_cod 9463 5151
zi_namesmall_cod_large_cod 3374 2226
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
# https://mc-stan.org/misc/warnings.html
tibble(rhat = rhat(fit),
par = names(rhat(fit))) |>
arrange(desc(rhat)) |>
arrange(desc(rhat)) |>
as.data.frame() rhat par
1 1.0015716 lprior
2 1.0008843 b_zi_nameflounder_small_cod
3 1.0008026 b_nameflounder_small_cod
4 1.0005838 b_Intercept
5 1.0005211 b_zi_namesmall_cod_large_cod
6 1.0004156 b_phi_namesmall_cod_large_cod
7 1.0003372 b_nameflounder_small_cod:mean_biom_sc
8 1.0002853 b_namesmall_cod_large_cod
9 1.0002793 lp__
10 1.0002164 b_namesmall_cod_large_cod:mean_biom_sc
11 1.0001093 b_mean_biom_sc
12 1.0000424 b_phi_Intercept
13 0.9999678 b_phi_nameflounder_small_cod
14 0.9999411 b_zi_Intercept
zi_intercept <- tidy(fit, effects = "fixed") |>
filter(component == "zi") |>
pull(estimate) Warning in tidy.brmsfit(fit, effects = "fixed"): some parameter names contain
underscores: term naming may be unreliable!
filter: removed 9 rows (75%), 3 rows remaining
# Transformed to a probability/proportion
plogis(zi_intercept)*100 # percent b_zi_Intercept b_zi_nameflounder_small_cod
2.512214 31.458065
b_zi_namesmall_cod_large_cod
1.955686
# Compare with data
ovr |>
count(value == 0) |>
mutate(prop = n / sum(n) *100)count: now 2 rows and 2 columns, ungrouped
# A tibble: 2 × 3
`value == 0` n prop
<lgl> <int> <dbl>
1 FALSE 222 98.7
2 TRUE 3 1.33
get_variables(fit) [1] "b_Intercept"
[2] "b_phi_Intercept"
[3] "b_zi_Intercept"
[4] "b_nameflounder_small_cod"
[5] "b_namesmall_cod_large_cod"
[6] "b_mean_biom_sc"
[7] "b_nameflounder_small_cod:mean_biom_sc"
[8] "b_namesmall_cod_large_cod:mean_biom_sc"
[9] "b_phi_nameflounder_small_cod"
[10] "b_phi_namesmall_cod_large_cod"
[11] "b_zi_nameflounder_small_cod"
[12] "b_zi_namesmall_cod_large_cod"
[13] "lprior"
[14] "lp__"
[15] "accept_stat__"
[16] "stepsize__"
[17] "treedepth__"
[18] "n_leapfrog__"
[19] "divergent__"
[20] "energy__"
plot(fit)# https://discourse.mc-stan.org/t/trying-to-understand-default-prior-for-the-hurdle-part/24337
priors <- prior_summary(fit)
priors prior class coef group resp
(flat) b
(flat) b mean_biom_sc
(flat) b nameflounder_small_cod
(flat) b nameflounder_small_cod:mean_biom_sc
(flat) b namesmall_cod_large_cod
(flat) b namesmall_cod_large_cod:mean_biom_sc
(flat) b
(flat) b nameflounder_small_cod
(flat) b namesmall_cod_large_cod
(flat) b
(flat) b nameflounder_small_cod
(flat) b namesmall_cod_large_cod
student_t(3, 0, 2.5) Intercept
student_t(3, 0, 2.5) Intercept
logistic(0, 1) Intercept
dpar nlpar lb ub source
default
(vectorized)
(vectorized)
(vectorized)
(vectorized)
(vectorized)
phi default
phi (vectorized)
phi (vectorized)
zi default
zi (vectorized)
zi (vectorized)
default
phi default
zi default
stancode(fit)// generated with brms 2.20.4
functions {
/* zero-inflated beta log-PDF of a single response
* Args:
* y: the response value
* mu: mean parameter of the beta distribution
* phi: precision parameter of the beta distribution
* zi: zero-inflation probability
* Returns:
* a scalar to be added to the log posterior
*/
real zero_inflated_beta_lpdf(real y, real mu, real phi, real zi) {
row_vector[2] shape = [mu * phi, (1 - mu) * phi];
if (y == 0) {
return bernoulli_lpmf(1 | zi);
} else {
return bernoulli_lpmf(0 | zi) +
beta_lpdf(y | shape[1], shape[2]);
}
}
/* zero-inflated beta log-PDF of a single response
* logit parameterization of the zero-inflation part
* Args:
* y: the response value
* mu: mean parameter of the beta distribution
* phi: precision parameter of the beta distribution
* zi: linear predictor for zero-inflation part
* Returns:
* a scalar to be added to the log posterior
*/
real zero_inflated_beta_logit_lpdf(real y, real mu, real phi, real zi) {
row_vector[2] shape = [mu * phi, (1 - mu) * phi];
if (y == 0) {
return bernoulli_logit_lpmf(1 | zi);
} else {
return bernoulli_logit_lpmf(0 | zi) +
beta_lpdf(y | shape[1], shape[2]);
}
}
// zero-inflated beta log-CCDF and log-CDF functions
real zero_inflated_beta_lccdf(real y, real mu, real phi, real zi) {
row_vector[2] shape = [mu * phi, (1 - mu) * phi];
return bernoulli_lpmf(0 | zi) + beta_lccdf(y | shape[1], shape[2]);
}
real zero_inflated_beta_lcdf(real y, real mu, real phi, real zi) {
return log1m_exp(zero_inflated_beta_lccdf(y | mu, phi, zi));
}
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int<lower=1> Kc; // number of population-level effects after centering
int<lower=1> K_phi; // number of population-level effects
matrix[N, K_phi] X_phi; // population-level design matrix
int<lower=1> Kc_phi; // number of population-level effects after centering
int<lower=1> K_zi; // number of population-level effects
matrix[N, K_zi] X_zi; // population-level design matrix
int<lower=1> Kc_zi; // number of population-level effects after centering
int prior_only; // should the likelihood be ignored?
}
transformed data {
matrix[N, Kc] Xc; // centered version of X without an intercept
vector[Kc] means_X; // column means of X before centering
matrix[N, Kc_phi] Xc_phi; // centered version of X_phi without an intercept
vector[Kc_phi] means_X_phi; // column means of X_phi before centering
matrix[N, Kc_zi] Xc_zi; // centered version of X_zi without an intercept
vector[Kc_zi] means_X_zi; // column means of X_zi before centering
for (i in 2:K) {
means_X[i - 1] = mean(X[, i]);
Xc[, i - 1] = X[, i] - means_X[i - 1];
}
for (i in 2:K_phi) {
means_X_phi[i - 1] = mean(X_phi[, i]);
Xc_phi[, i - 1] = X_phi[, i] - means_X_phi[i - 1];
}
for (i in 2:K_zi) {
means_X_zi[i - 1] = mean(X_zi[, i]);
Xc_zi[, i - 1] = X_zi[, i] - means_X_zi[i - 1];
}
}
parameters {
vector[Kc] b; // regression coefficients
real Intercept; // temporary intercept for centered predictors
vector[Kc_phi] b_phi; // regression coefficients
real Intercept_phi; // temporary intercept for centered predictors
vector[Kc_zi] b_zi; // regression coefficients
real Intercept_zi; // temporary intercept for centered predictors
}
transformed parameters {
real lprior = 0; // prior contributions to the log posterior
lprior += student_t_lpdf(Intercept | 3, 0, 2.5);
lprior += student_t_lpdf(Intercept_phi | 3, 0, 2.5);
lprior += logistic_lpdf(Intercept_zi | 0, 1);
}
model {
// likelihood including constants
if (!prior_only) {
// initialize linear predictor term
vector[N] mu = rep_vector(0.0, N);
// initialize linear predictor term
vector[N] phi = rep_vector(0.0, N);
// initialize linear predictor term
vector[N] zi = rep_vector(0.0, N);
mu += Intercept + Xc * b;
phi += Intercept_phi + Xc_phi * b_phi;
zi += Intercept_zi + Xc_zi * b_zi;
mu = inv_logit(mu);
phi = exp(phi);
for (n in 1:N) {
target += zero_inflated_beta_logit_lpdf(Y[n] | mu[n], phi[n], zi[n]);
}
}
// priors including constants
target += lprior;
}
generated quantities {
// actual population-level intercept
real b_Intercept = Intercept - dot_product(means_X, b);
// actual population-level intercept
real b_phi_Intercept = Intercept_phi - dot_product(means_X_phi, b_phi);
// actual population-level intercept
real b_zi_Intercept = Intercept_zi - dot_product(means_X_zi, b_zi);
}
pp_check(fit) +
coord_cartesian(expand = TRUE) +
labs(x = "Schoener's overlap index",
y = "Density") +
theme(legend.position.inside = c(0.8, 0.8)) +
guides(color = guide_legend(position = "inside"))Using 10 posterior draws for ppc type 'dens_overlay' by default.
Coordinate system already present. Adding new coordinate system, which will
replace the existing one.
ggsave(paste0(home, "/figures/supp/schoener_zib_pp_check.pdf"), width = 11, height = 11, units = "cm")
conditional_effects(fit)set.seed(123)
ovr <- ovr |>
mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2))
p1 <- ovr |>
data_grid(name = unique(name),
mean_biom_sc = 0) |>
mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2)) |>
add_epred_draws(fit) |>
ggplot(aes(.epred, name2)) +
geom_jitter(data = ovr, aes(value, name2, color = name2), height = 0.2, width = 0, alpha = 0.4) +
stat_pointinterval(.width = c(.66, .95), color = "gray30") +
geom_vline(xintercept = 0.6, linetype = 2, alpha = 0.6) +
scale_color_brewer(palette = "Set1") +
labs(x = "Schoener's overlap index", y = "", color = "") +
geom_stripped_rows(color = NA) +
coord_cartesian(expand = 0, xlim = c(-0.03, 1)) +
theme(plot.margin = unit(c(0, 0.3, 0, 0), "cm")) +
guides(color = "none") +
NULL
pal <- brewer.pal(name = "RdBu", n = 7)
# Check CI's
fit |>
spread_draws(b_mean_biom_sc,
`b_nameflounder_small_cod:mean_biom_sc`,
`b_namesmall_cod_large_cod:mean_biom_sc`) |>
mutate("Flounder\nLarge cod" = b_mean_biom_sc,
"Flounder\nSmall cod" = b_mean_biom_sc + `b_nameflounder_small_cod:mean_biom_sc`,
"Small cod\nLarge cod" = b_mean_biom_sc + `b_namesmall_cod_large_cod:mean_biom_sc`,
) |>
summarise(mean = mean(`Small cod\nLarge cod`),
upr = quantile(`Small cod\nLarge cod`, probs = 0.845))summarise: now one row and 2 columns, ungrouped
# A tibble: 1 × 2
mean upr
<dbl> <dbl>
1 -0.365 -0.232
p2 <- fit |>
spread_draws(b_mean_biom_sc,
`b_nameflounder_small_cod:mean_biom_sc`,
`b_namesmall_cod_large_cod:mean_biom_sc`) |>
mutate("Flounder\nLarge cod" = b_mean_biom_sc,
"Flounder\nSmall cod" = b_mean_biom_sc + `b_nameflounder_small_cod:mean_biom_sc`,
"Small cod\nLarge cod" = b_mean_biom_sc + `b_namesmall_cod_large_cod:mean_biom_sc`,
) |>
pivot_longer(c(`Flounder\nLarge cod`, `Flounder\nSmall cod`, `Small cod\nLarge cod`),
names_to = "name2", values_to = "slope") |>
#ggplot(aes(slope, `Overlap group`, fill = after_stat(x < 0))) +
ggplot(aes(slope, name2, fill = name2, color = name2, alpha = after_stat(x < 0))) +
scale_fill_brewer(palette = "Set1", name = "") +
scale_color_brewer(palette = "Set1", name = "") +
scale_alpha_manual(values = c(0.4, 0.7)) +
ggdist::stat_eye() +
stat_pointinterval(.width = c(.66, .95), color = "gray10") +
labs(y = "", x = "Slope of biomass density\non the mean Schoener's diet overlap") +
geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.6) +
theme(legend.position.inside = c(0.15, 0.1)) +
guides(alpha = guide_legend(position = "inside", override.aes = list(alpha = c(0.2, 0.8))),
fill = "none", color = "none") +
geom_stripped_rows(aes(slope, name2, fill = name2),
inherit.aes = FALSE) +
coord_cartesian(expand = 0) +
NULLpivot_longer: reorganized (Flounder
Large cod, Flounder
Small cod, Small cod
Large cod) into (name2, slope) [was 8000x9, now 24000x8]
Warning in geom_stripped_rows(aes(slope, name2, fill = name2), inherit.aes =
FALSE): Ignoring unknown aesthetics: x and fill
# Conditional
p3 <- ovr |>
data_grid(name = unique(name),
mean_biom_sc = seq_range(mean_biom_sc, n = 101)) |>
mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2)) |>
add_epred_draws(fit) |>
ggplot(aes(x = mean_biom_sc, y = value, fill = name2, color = name2)) +
stat_lineribbon(aes(y = .epred), .width = c(.95), alpha = 0.6) +
geom_point(data = ovr, alpha = 0.5) +
scale_fill_brewer(palette = "Pastel1") +
scale_color_brewer(palette = "Set1") +
theme(legend.position.inside = c(0.90, 0.84)) +
labs(y = "Schoener's overlap index", x = "Scaled cod and flounder mean biomass density",
fill = "", color = "") +
guides(fill = guide_legend(position = "inside"))
p3pp <- (p1 + p2) + plot_layout(axes = "collect")
pp / p3 +
plot_annotation(tag_levels = "a")ggsave(paste0(home, "/figures/schoener_zib.pdf"), width = 17, height = 20, units = "cm")
# What's the actual predictions
sum <- ovr |>
data_grid(name = unique(name),
mean_biom_sc = 0) |>
mutate(name2 = ifelse(name == "flounder_large_cod", "Flounder\nLarge cod", name),
name2 = ifelse(name == "flounder_small_cod", "Flounder\nSmall cod", name2),
name2 = ifelse(name == "small_cod_large_cod", "Small cod\nLarge cod", name2)) |>
add_epred_draws(fit) |>
group_by(name) |>
summarise(mean = mean(.epred),
lwr = quantile(.epred, probs = 0.025),
upr = quantile(.epred, probs = 0.975))summarise: now 3 rows and 4 columns, ungrouped
sum# A tibble: 3 × 4
name mean lwr upr
<chr> <dbl> <dbl> <dbl>
1 flounder_large_cod 0.0741 0.0568 0.0959
2 flounder_small_cod 0.151 0.122 0.185
3 small_cod_large_cod 0.311 0.258 0.370
p1 + geom_vline(data = sum, aes(xintercept = mean))knitr::knit_exit()